{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.ensemble import GradientBoostingRegressor"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 获取训练数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.005988</td>\n",
       "      <td>0.569231</td>\n",
       "      <td>0.647059</td>\n",
       "      <td>0.951220</td>\n",
       "      <td>-0.225434</td>\n",
       "      <td>0.837989</td>\n",
       "      <td>0.357258</td>\n",
       "      <td>-0.003058</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.161677</td>\n",
       "      <td>0.743195</td>\n",
       "      <td>0.682353</td>\n",
       "      <td>0.960976</td>\n",
       "      <td>-0.086705</td>\n",
       "      <td>0.780527</td>\n",
       "      <td>0.282945</td>\n",
       "      <td>0.149847</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.113772</td>\n",
       "      <td>0.744379</td>\n",
       "      <td>0.541176</td>\n",
       "      <td>0.990244</td>\n",
       "      <td>-0.005780</td>\n",
       "      <td>0.721468</td>\n",
       "      <td>0.434110</td>\n",
       "      <td>-0.318043</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.053892</td>\n",
       "      <td>0.608284</td>\n",
       "      <td>0.764706</td>\n",
       "      <td>0.951220</td>\n",
       "      <td>-0.248555</td>\n",
       "      <td>0.821229</td>\n",
       "      <td>0.848604</td>\n",
       "      <td>-0.003058</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.173653</td>\n",
       "      <td>0.866272</td>\n",
       "      <td>0.682353</td>\n",
       "      <td>0.951220</td>\n",
       "      <td>0.017341</td>\n",
       "      <td>0.704709</td>\n",
       "      <td>-0.021002</td>\n",
       "      <td>-0.195719</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2         3         4         5         6  \\\n",
       "0  0.005988  0.569231  0.647059  0.951220 -0.225434  0.837989  0.357258   \n",
       "1  0.161677  0.743195  0.682353  0.960976 -0.086705  0.780527  0.282945   \n",
       "2  0.113772  0.744379  0.541176  0.990244 -0.005780  0.721468  0.434110   \n",
       "3  0.053892  0.608284  0.764706  0.951220 -0.248555  0.821229  0.848604   \n",
       "4  0.173653  0.866272  0.682353  0.951220  0.017341  0.704709 -0.021002   \n",
       "\n",
       "          7  \n",
       "0 -0.003058  \n",
       "1  0.149847  \n",
       "2 -0.318043  \n",
       "3 -0.003058  \n",
       "4 -0.195719  "
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_feature = np.genfromtxt(\"train_feat.txt\",dtype=np.float32)\n",
    "num_feature = len(train_feature[0])\n",
    "train_feature = pd.DataFrame(train_feature)\n",
    "\n",
    "train_label = train_feature.iloc[:, num_feature - 1]\n",
    "train_feature = train_feature.iloc[:, 0:num_feature - 2]\n",
    "train_feature"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    320.0\n",
       "1    361.0\n",
       "2    364.0\n",
       "3    336.0\n",
       "4    358.0\n",
       "Name: 9, dtype: float32"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_label"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 获取测试数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.005988</td>\n",
       "      <td>0.569231</td>\n",
       "      <td>0.647059</td>\n",
       "      <td>0.951220</td>\n",
       "      <td>-0.225434</td>\n",
       "      <td>0.837989</td>\n",
       "      <td>0.357258</td>\n",
       "      <td>-0.003058</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.161677</td>\n",
       "      <td>0.743195</td>\n",
       "      <td>0.682353</td>\n",
       "      <td>0.960976</td>\n",
       "      <td>-0.086705</td>\n",
       "      <td>0.780527</td>\n",
       "      <td>0.282945</td>\n",
       "      <td>0.149847</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.113772</td>\n",
       "      <td>0.744379</td>\n",
       "      <td>0.541176</td>\n",
       "      <td>0.990244</td>\n",
       "      <td>-0.005780</td>\n",
       "      <td>0.721468</td>\n",
       "      <td>0.434110</td>\n",
       "      <td>-0.318043</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.053892</td>\n",
       "      <td>0.608284</td>\n",
       "      <td>0.764706</td>\n",
       "      <td>0.951220</td>\n",
       "      <td>-0.248555</td>\n",
       "      <td>0.821229</td>\n",
       "      <td>0.848604</td>\n",
       "      <td>-0.003058</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.173653</td>\n",
       "      <td>0.866272</td>\n",
       "      <td>0.682353</td>\n",
       "      <td>0.951220</td>\n",
       "      <td>0.017341</td>\n",
       "      <td>0.704709</td>\n",
       "      <td>-0.021002</td>\n",
       "      <td>-0.195719</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2         3         4         5         6  \\\n",
       "0  0.005988  0.569231  0.647059  0.951220 -0.225434  0.837989  0.357258   \n",
       "1  0.161677  0.743195  0.682353  0.960976 -0.086705  0.780527  0.282945   \n",
       "2  0.113772  0.744379  0.541176  0.990244 -0.005780  0.721468  0.434110   \n",
       "3  0.053892  0.608284  0.764706  0.951220 -0.248555  0.821229  0.848604   \n",
       "4  0.173653  0.866272  0.682353  0.951220  0.017341  0.704709 -0.021002   \n",
       "\n",
       "          7  \n",
       "0 -0.003058  \n",
       "1  0.149847  \n",
       "2 -0.318043  \n",
       "3 -0.003058  \n",
       "4 -0.195719  "
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_feature = np.genfromtxt(\"test_feat.txt\",dtype=np.float32)\n",
    "num_feature = len(test_feature[0])\n",
    "test_feature = pd.DataFrame(test_feature)\n",
    "\n",
    "test_label = test_feature.iloc[:, num_feature - 1]\n",
    "test_feature = test_feature.iloc[:, 0:num_feature - 2]\n",
    "test_feature"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    320.0\n",
       "1    361.0\n",
       "2    364.0\n",
       "3    336.0\n",
       "4    358.0\n",
       "Name: 9, dtype: float32"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_label"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### GBDT模型建立"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "pred: 320.0008173984891  label: 320.0\n",
      "pred: 360.99965033119537  label: 361.0\n",
      "pred: 363.99928183902097  label: 364.0\n",
      "pred: 336.0002344322584  label: 336.0\n",
      "pred: 358.0000159974151  label: 358.0\n",
      "均方误差: 0.0005218003748239915\n"
     ]
    }
   ],
   "source": [
    "gbdt = GradientBoostingRegressor(\n",
    "  loss = 'ls'\n",
    ", learning_rate = 0.1\n",
    ", n_estimators = 100\n",
    ", subsample = 1\n",
    ", min_samples_split = 2\n",
    ", min_samples_leaf = 1\n",
    ", max_depth = 3\n",
    ", init = None\n",
    ", random_state = None\n",
    ", max_features = None\n",
    ", alpha = 0.9\n",
    ", verbose = 0\n",
    ", max_leaf_nodes = None\n",
    ", warm_start = False\n",
    ")\n",
    "\n",
    "gbdt.fit(train_feature, train_label)\n",
    "pred = gbdt.predict(test_feature)\n",
    "total_err = 0\n",
    "\n",
    "for i in range(pred.shape[0]):\n",
    "    print('pred:', pred[i], ' label:', test_label[i])\n",
    "print('均方误差:', np.sqrt(((pred - test_label) ** 2).mean()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
